[FRAUD] 데이터 (9.13_df50 edge다르게)

Author

김보람

Published

September 13, 2023

imports

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import networkx as nx
import sklearn
import xgboost as xgb

# sklearn
from sklearn import model_selection # split함수이용
from sklearn import ensemble # RF,GBM
from sklearn import metrics
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

# embedding 
from node2vec import Node2Vec
from node2vec.edges import HadamardEmbedder, AverageEmbedder, WeightedL1Embedder, WeightedL2Embedder

# gnn
import torch
import torch_geometric
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
def build_graph_bipartite(df_input, graph_type=nx.Graph()):
    df=df_input.copy()
    mapping={x:node_id for node_id, x in enumerate(set(df["cc_num"].values.tolist()+\
                                                      df["merchant"].values.tolist()))}
    
    df["from"]=df["cc_num"].apply(lambda x:mapping[x])  #엣지의 출발점
    df["to"]=df["merchant"].apply(lambda x:mapping[x])  #엣지의 도착점
    
    df = df[['from', 'to', "amt", "is_fraud"]].groupby(['from','to']).agg({"is_fraud":"sum","amt":"sum"}).reset_index()
    df["is_fraud"]=df["is_fraud"].apply(lambda x:1 if x>0 else 0)
    
    G=nx.from_edgelist(df[["from","to"]].values, create_using=graph_type)
    
    nx.set_edge_attributes(G,{(int(x["from"]),int(x["to"])):x["is_fraud"] for idx, x in df[["from","to","is_fraud"]].iterrows()}, "label")  #엣지 속성 설정,각 속성의 사기 여부부     
    nx.set_edge_attributes(G,{(int(x["from"]),int(x["to"])):x["amt"] for idx,x in df[["from","to","amt"]].iterrows()}, "weight") # 엣지 속성 설정, 각 엣지의 거래 금액

    return G


def build_graph_tripartite(df_input, graph_type=nx.Graph()):
    df=df_input.copy()
    mapping={x:node_id for node_id, x in enumerate(set(df.index.values.tolist() + 
                                                       df["cc_num"].values.tolist() +
                                                       df["merchant"].values.tolist()))}
    df["in_node"]= df["cc_num"].apply(lambda x: mapping[x])
    df["out_node"]=df["merchant"].apply(lambda x:mapping[x])
    
        
    G=nx.from_edgelist([(x["in_node"], mapping[idx]) for idx, x in df.iterrows()] +\
                        [(x["out_node"], mapping[idx]) for idx, x in df.iterrows()], create_using=graph_type)
    
    nx.set_edge_attributes(G,{(x["in_node"], mapping[idx]):x["is_fraud"] for idx, x in df.iterrows()}, "label")     
    nx.set_edge_attributes(G,{(x["out_node"], mapping[idx]):x["is_fraud"] for idx, x in df.iterrows()}, "label")   
    nx.set_edge_attributes(G,{(x["in_node"], mapping[idx]):x["amt"] for idx, x in df.iterrows()}, "weight")  
    nx.set_edge_attributes(G,{(x["out_node"], mapping[idx]):x["amt"] for idx, x in df.iterrows()}, "weight")

    return G
    
    
def down_sample_textbook(df):
    df_majority = df[df.is_fraud==0].copy()
    df_minority = df[df.is_fraud==1].copy()
    df_maj_dowsampled = sklearn.utils.resample(df_majority, n_samples=len(df_minority), replace=False, random_state=42)
    df_downsampled = pd.concat([df_minority, df_maj_dowsampled])
    return df_downsampled

def embedding(Graph):
    # Graph -> X (feature)
    _edgs = list(Graph.edges)
    subGraph = Graph.edge_subgraph([_edgs[x] for x in range(len(Graph.edges))]).copy()
    subGraph.add_nodes_from(list(set(Graph.nodes) - set(subGraph.nodes)))    
    embedded = AverageEmbedder(Node2Vec(subGraph, weight_key='weight').fit(window=10).wv)
    X = [embedded[str(_edgs[x][0]), str(_edgs[x][1])] for x in range(len(Graph.edges))]
    # Graph -> y (label)
    y = np.array(list(nx.get_edge_attributes(Graph, "label").values()))
    return X,y 

def anal(df):
    Graph = build_graph_bipartite(df)
    X,XX,y,yy = embedding(Graph)
    lrnr = RandomForestClassifier(n_estimators=100, random_state=42) 
    lrnr.fit(X,y)
    yyhat = lrnr.predict(XX)
    df = pd.DataFrame({
        'acc':[sklearn.metrics.accuracy_score(yy,yyhat)], 
        'pre':[sklearn.metrics.precision_score(yy,yyhat)], 
        'rec':[sklearn.metrics.recall_score(yy,yyhat)],
        'f1':[sklearn.metrics.f1_score(yy,yyhat)]}
    )    
    return df

def our_sampling1(df):
    cus_list = set(df.query('is_fraud==1').cc_num.tolist())
    return df.query("cc_num in @ cus_list")
fraudTrain = pd.read_csv("~/Desktop/fraudTrain.csv").iloc[:,1:]
fraudTrain = fraudTrain.assign(trans_date_trans_time= list(map(lambda x: pd.to_datetime(x), fraudTrain.trans_date_trans_time)))
fraudTrain
trans_date_trans_time cc_num merchant category amt first last gender street city ... lat long city_pop job dob trans_num unix_time merch_lat merch_long is_fraud
0 2019-01-01 00:00:00 2.703190e+15 fraud_Rippin, Kub and Mann misc_net 4.97 Jennifer Banks F 561 Perry Cove Moravian Falls ... 36.0788 -81.1781 3495 Psychologist, counselling 1988-03-09 0b242abb623afc578575680df30655b9 1325376018 36.011293 -82.048315 0
1 2019-01-01 00:00:00 6.304230e+11 fraud_Heller, Gutmann and Zieme grocery_pos 107.23 Stephanie Gill F 43039 Riley Greens Suite 393 Orient ... 48.8878 -118.2105 149 Special educational needs teacher 1978-06-21 1f76529f8574734946361c461b024d99 1325376044 49.159047 -118.186462 0
2 2019-01-01 00:00:00 3.885950e+13 fraud_Lind-Buckridge entertainment 220.11 Edward Sanchez M 594 White Dale Suite 530 Malad City ... 42.1808 -112.2620 4154 Nature conservation officer 1962-01-19 a1a22d70485983eac12b5b88dad1cf95 1325376051 43.150704 -112.154481 0
3 2019-01-01 00:01:00 3.534090e+15 fraud_Kutch, Hermiston and Farrell gas_transport 45.00 Jeremy White M 9443 Cynthia Court Apt. 038 Boulder ... 46.2306 -112.1138 1939 Patent attorney 1967-01-12 6b849c168bdad6f867558c3793159a81 1325376076 47.034331 -112.561071 0
4 2019-01-01 00:03:00 3.755340e+14 fraud_Keeling-Crist misc_pos 41.96 Tyler Garcia M 408 Bradley Rest Doe Hill ... 38.4207 -79.4629 99 Dance movement psychotherapist 1986-03-28 a41d7549acf90789359a9aa5346dcb46 1325376186 38.674999 -78.632459 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1048570 2020-03-10 16:07:00 6.011980e+15 fraud_Fadel Inc health_fitness 77.00 Haley Wagner F 05561 Farrell Crescent Annapolis ... 39.0305 -76.5515 92106 Accountant, chartered certified 1943-05-28 45ecd198c65e81e597db22e8d2ef7361 1362931649 38.779464 -76.317042 0
1048571 2020-03-10 16:07:00 4.839040e+15 fraud_Cremin, Hamill and Reichel misc_pos 116.94 Meredith Campbell F 043 Hanson Turnpike Hedrick ... 41.1826 -92.3097 1583 Geochemist 1999-06-28 c00ce51c6ebb7657474a77b9e0b51f34 1362931670 41.400318 -92.726724 0
1048572 2020-03-10 16:08:00 5.718440e+11 fraud_O'Connell, Botsford and Hand home 21.27 Susan Mills F 005 Cody Estates Louisville ... 38.2507 -85.7476 736284 Engineering geologist 1952-04-02 17c9dc8b2a6449ca2473726346e58e6c 1362931711 37.293339 -84.798122 0
1048573 2020-03-10 16:08:00 4.646850e+18 fraud_Thompson-Gleason health_fitness 9.52 Julia Bell F 576 House Crossroad West Sayville ... 40.7320 -73.1000 4056 Film/video editor 1990-06-25 5ca650881b48a6a38754f841c23b77ab 1362931718 39.773077 -72.213209 0
1048574 2020-03-10 16:08:00 2.283740e+15 fraud_Buckridge PLC misc_pos 6.81 Shannon Williams F 9345 Spencer Junctions Suite 183 Alpharetta ... 34.0770 -84.3033 165556 Prison officer 1997-12-27 8d0a575fe635bbde12f1a2bffc126731 1362931730 33.601468 -83.891921 0

1048575 rows × 22 columns

GNN시도

_df1 = fraudTrain[fraudTrain["is_fraud"] == 0].sample(frac=0.20, random_state=42)
_df2 = fraudTrain[fraudTrain["is_fraud"] == 1]
df02 = pd.concat([_df1,_df2])
df02.shape
(214520, 22)
df50 = down_sample_textbook(df02)
df50.shape
(12012, 22)
df50 = df50.reset_index()
N = len(df50)

tr/test

df50_tr,df50_test = sklearn.model_selection.train_test_split(df50, random_state=42)
df50_tr.shape, df50_test.shape
((9009, 23), (3003, 23))
train_mask = [i in df50_tr.index for i in range(N)]
test_mask = [i in df50_test.index for i in range(N)]
train_mask = np.array(train_mask)
test_mask = np.array(test_mask)
train_mask.sum(), test_mask.sum()
(9009, 3003)
train_mask.shape, test_mask.shape
((12012,), (12012,))

edge_index 설정

def compute_time_difference(group):
    n = len(group)
    result = []
    for i in range(n):
        for j in range(n):
            time_difference = abs(group.iloc[i].trans_date_trans_time.value - group.iloc[j].trans_date_trans_time.value)
            result.append([group.iloc[i].name, group.iloc[j].name, time_difference])
    return result
groups = df50.groupby('cc_num')
edge_index_list_plus = [compute_time_difference(group) for _, group in groups]
edge_index_list_plus_flat = [item for sublist in edge_index_list_plus for item in sublist]
edge_index_list_plus_nparr = np.array(edge_index_list_plus_flat)
np.save('edge_index_list_plus50.npy', edge_index_list_plus_nparr)
# edge_index_list_plus = []
# for i in range(N):
#     for j in range(N):
#         if df50['cc_num'][i] != df50['cc_num'][j]:  # cc_num 값이 다르다면
#             time_difference = 0
#         else:
#             time_difference = (df50['trans_date_trans_time'][i] - df50['trans_date_trans_time'][j]).total_seconds()
#         edge_index_list_plus.append([i, j, time_difference])
#         np.save('edge_index_list_plus50.npy', edge_index_list_plus)

# # edge_index_list_plus = np.load('edge_index_list_plus.npy')
edge_index = np.array(edge_index_list_plus_nparr)
edge_index.shape
(200706, 3)
weight = (np.exp(-edge_index[:,2]/theta) != 1)*(np.exp(-edge_index[:,2]/theta))
weight
array([0.        , 0.99946775, 0.99020659, ..., 0.98799491, 0.02078565,
       0.        ])
edge_index = np.column_stack((edge_index, weight))
edge_index = np.delete(edge_index, 2, axis=1)
edge_index
array([[1.02300000e+03, 1.02300000e+03, 0.00000000e+00],
       [1.02300000e+03, 1.02400000e+03, 9.99467748e-01],
       [1.02300000e+03, 1.02800000e+03, 9.90206590e-01],
       ...,
       [1.19440000e+04, 9.78200000e+03, 9.87994908e-01],
       [1.19440000e+04, 1.17670000e+04, 2.07856509e-02],
       [1.19440000e+04, 1.19440000e+04, 0.00000000e+00]])
edge_index.shape
(200706, 3)
edge_index = edge_index.tolist()
mean_ = np.array(edge_index)[:,2].mean()

- median

medi_ = np.median(np.array(edge_index)[:,2])
selected_edges = [(int(row[0]), int(row[1])) for row in edge_index if row[2] > medi_]
edge_index_selected = torch.tensor(selected_edges, dtype=torch.long).t()
edge_index_selected.shape
torch.Size([2, 100350])

data설정(x, edge_index, y)

x = torch.tensor(df50['amt'], dtype=torch.float).reshape(-1,1)
y = torch.tensor(df50['is_fraud'],dtype=torch.int64)
data = torch_geometric.data.Data(x=x, edge_index = edge_index_selected, y=y, train_mask = train_mask, test_mask = test_mask)
data
Data(x=[12012, 1], edge_index=[2, 100350], y=[12012], train_mask=[12012], test_mask=[12012])

gnn


class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(1, 16)
        self.conv2 = GCNConv(16,2)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)
model = GCN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
model.train()
for epoch in range(400):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
model.eval()
pred = model(data).argmax(dim=1)
correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
acc = int(correct) / int(np.array(data.test_mask).sum())
print(f'Accuracy: {acc:.4f}')
Accuracy: 0.8768
predicted_labels = pred[data.test_mask]
true_labels = data.y[data.test_mask]
precision = precision_score(true_labels, predicted_labels, average='macro')
recall = recall_score(true_labels, predicted_labels, average='macro')
f1 = f1_score(true_labels, predicted_labels, average='macro')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
Precision: 0.8799
Recall: 0.8763
F1 Score: 0.8764

정리

구분 Train Test 모형 설명변수 비고
분석1 df50_tr df50_test GNN amt
분석2 df50_tr df50_test 로지스틱 회귀 amt
분석3 df50_tr df50_test SVM amt
분석4 df50_tr df50_test 랜덤포레스트 amt

분석2(로지스틱 회귀)

X = np.array(df50_tr.loc[:,['amt']])
XX = np.array(df50_test.loc[:,['amt']])
y = np.array(df50_tr.is_fraud)
yy = np.array(df50_test.is_fraud)
lrnr = sklearn.linear_model.LogisticRegression()
lrnr.fit(X,y)
#thresh = y.mean()
#yyhat = (lrnr.predict_proba(XX)> thresh)[:,-1]
yyhat = lrnr.predict(XX) 
metrics = [sklearn.metrics.accuracy_score,
           sklearn.metrics.precision_score,
           sklearn.metrics.recall_score,
           sklearn.metrics.f1_score]
_results2= pd.DataFrame({m.__name__:[m(yy,yyhat).round(6)] for m in metrics},index=['분석2'])
_results2

분석3(서포트 벡터 머신)

X = np.array(df50_tr.loc[:, ['amt']])
XX = np.array(df50_test.loc[:, ['amt']])
y = np.array(df50_tr.is_fraud)
yy = np.array(df50_test.is_fraud)
lrnr = SVC(kernel='linear')  
lrnr.fit(X,y)
yyhat = lrnr.predict(XX)
metrics = [sklearn.metrics.accuracy_score,
           sklearn.metrics.precision_score,
           sklearn.metrics.recall_score,
           sklearn.metrics.f1_score]
_results3= pd.DataFrame({m.__name__:[m(yy,yyhat).round(6)] for m in metrics},index=['분석3'])
_results3

분석4(랜덤 포레스트)

X = np.array(df50_tr.loc[:, ['amt']])
XX = np.array(df50_test.loc[:, ['amt']])
y = np.array(df50_tr.is_fraud)
yy = np.array(df50_test.is_fraud)
lrnr = RandomForestClassifier()  
lrnr.fit(X, y)
yyhat = lrnr.predict(XX)
metrics = [sklearn.metrics.accuracy_score,
           sklearn.metrics.precision_score,
           sklearn.metrics.recall_score,
           sklearn.metrics.f1_score]
_results4= pd.DataFrame({m.__name__:[m(yy,yyhat).round(6)] for m in metrics},index=['분석4'])
_results4

분석5(부스팅)

X = np.array(df50_tr.loc[:, ['amt']])
XX = np.array(df50_test.loc[:, ['amt']])
y = np.array(df50_tr.is_fraud)
yy = np.array(df50_test.is_fraud)
lrnr = xgb.XGBClassifier()  
lrnr.fit(X, y)
yyhat = lrnr.predict(XX)
metrics = [sklearn.metrics.accuracy_score,
           sklearn.metrics.precision_score,
           sklearn.metrics.recall_score,
           sklearn.metrics.f1_score]
_results5= pd.DataFrame({m.__name__:[m(yy,yyhat).round(6)] for m in metrics},index=['분석5'])
_results5

분석6(Naive Bayes)

X = np.array(df50_tr.loc[:, ['amt']])
XX = np.array(df50_test.loc[:, ['amt']])
y = np.array(df50_tr.is_fraud)
yy = np.array(df50_test.is_fraud)
lrnr = GaussianNB() 
lrnr.fit(X, y)
yyhat = lrnr.predict(XX)
metrics = [sklearn.metrics.accuracy_score,
           sklearn.metrics.precision_score,
           sklearn.metrics.recall_score,
           sklearn.metrics.f1_score]
_results6= pd.DataFrame({m.__name__:[m(yy,yyhat).round(6)] for m in metrics},index=['분석6'])
_results6
!git add .
!git commit -m .
[main f9769619] .
 8 files changed, 8800 insertions(+), 2514 deletions(-)
 create mode 100644 "posts/GNN/FRAUD/.ipynb_checkpoints/230822 \353\215\260\354\235\264\355\204\260(6, df02)-Copy1-checkpoint.ipynb"
 create mode 100644 "posts/GNN/FRAUD/.ipynb_checkpoints/230822 \353\215\260\354\235\264\355\204\260(6, df02)-checkpoint.ipynb"
 create mode 100644 "posts/GNN/FRAUD/.ipynb_checkpoints/230823 \353\215\260\354\235\264\355\204\260(7, df50_com\354\234\274\353\241\234 93\355\215\274 accuracy)_guebin-checkpoint.ipynb"
 create mode 100644 "posts/GNN/FRAUD/.ipynb_checkpoints/230825 \353\215\260\354\235\264\355\204\260(8, df02)\354\273\244\353\204\220\354\243\275\354\235\214.out-checkpoint.ipynb"
 create mode 100644 "posts/GNN/FRAUD/.ipynb_checkpoints/230827 \353\215\260\354\235\264\355\204\260(9, df50 mask\353\247\214\353\223\244\354\227\210\353\212\224\353\215\260 \352\262\260\352\263\274\352\260\222\354\235\264 \353\213\254\353\235\274).out-checkpoint.ipynb"
 delete mode 100644 "posts/GNN/FRAUD/230823 \353\215\260\354\235\264\355\204\260(7, df50_com\354\234\274\353\241\234 93\355\215\274 accuracy)_guebin-Copy1.out.ipynb"
 rewrite "posts/GNN/FRAUD/230825 \353\215\260\354\235\264\355\204\260(8, df02)\354\273\244\353\204\220\354\243\275\354\235\214.out.ipynb" (99%)
 rewrite "posts/GNN/FRAUD/230827 \353\215\260\354\235\264\355\204\260(9, df50 mask\353\247\214\353\223\244\354\227\210\353\212\224\353\215\260 \352\262\260\352\263\274\352\260\222\354\235\264 \353\213\254\353\235\274).out.ipynb" (99%)
!git push
Enumerating objects: 14, done.
Counting objects: 100% (14/14), done.
Delta compression using up to 16 threads
Compressing objects: 100% (9/9), done.
Writing objects: 100% (9/9), 5.06 KiB | 5.06 MiB/s, done.
Total 9 (delta 7), reused 0 (delta 0), pack-reused 0
remote: Resolving deltas: 100% (7/7), completed with 5 local objects.
To https://github.com/boram-coco/coco.git
   1bbb61bf..f9769619  main -> main
!quarto publish gh-pages --no-prompt --no-browser
From https://github.com/boram-coco/coco
 * branch              gh-pages   -> FETCH_HEAD
Rendering for publish:

[  1/182] posts/Python/Summer Program-Python Programming Day1 Quiz.ipynb
[  2/182] posts/Python/4. Class/python 11_0511.ipynb
[  3/182] posts/Python/4. Class/python 13_0530.ipynb
[  4/182] posts/Python/4. Class/python 10_0509.ipynb
[  5/182] posts/Python/4. Class/python 12_0523.ipynb
[  6/182] posts/Python/4. Class/python 13_0525.ipynb
[  7/182] posts/Python/4. Class/python 12_0518.ipynb
[  8/182] posts/Python/4. Class/python 11_0516.ipynb
[  9/182] posts/Python/4. Class/python 14_0606.ipynb
[ 10/182] posts/Python/1. Basic/python 3_0321.ipynb
[ 11/182] posts/Python/1. Basic/python 1_0307.ipynb
[ 12/182] posts/Python/1. Basic/python 4_0323.ipynb
[ 13/182] posts/Python/1. Basic/python 3_0316.ipynb
[ 14/182] posts/Python/1. Basic/python 4_0328.ipynb
[ 15/182] posts/Python/1. Basic/python 2_0314.ipynb
[ 16/182] posts/Python/3. Pandas/python 10_0506 .ipynb
[ 17/182] posts/Python/2. Numpy/python 7_0418.ipynb
[ 18/182] posts/Python/2. Numpy/python 5_0406.ipynb
[ 19/182] posts/Python/2. Numpy/python 5_0404.ipynb
[ 20/182] posts/Python/2. Numpy/python 7_0413.ipynb
[ 21/182] posts/Python/2. Numpy/python 6_0411.ipynb
[ 22/182] posts/Python/Summer Program-Python Programming Day2 Quiz.ipynb
[ 23/182] posts/Special Topics in Big Data Analysis/2022-05-23-(12주차) 5월23일.ipynb
[ 24/182] posts/Special Topics in Big Data Analysis/2022_03_07_(1주차)_3월7일.ipynb
[ 25/182] posts/Special Topics in Big Data Analysis/2022_04_11_(6주차)_4월11일.ipynb
[ 26/182] posts/Special Topics in Big Data Analysis/2022_03_14_(2주차)_3월14일.ipynb
[ 27/182] posts/Special Topics in Big Data Analysis/2022_05_02_(9주차)_5월2일(2).ipynb
[ 28/182] posts/Special Topics in Big Data Analysis/2022_03_28_(4주차)_3월28일.ipynb
[ 29/182] posts/Special Topics in Big Data Analysis/2022_05_30_(13주차)_5월30일.ipynb
[ 30/182] posts/Special Topics in Big Data Analysis/2022_05_09_(10주차)_5월9일.ipynb
[ 31/182] posts/Special Topics in Big Data Analysis/2022_04_04_(5주차)_4월4일.ipynb
[ 32/182] posts/Special Topics in Big Data Analysis/2022_05_16_(11주차)_5월16일.ipynb
[ 33/182] posts/Special Topics in Big Data Analysis/2022_03_21_(3주차)_3월21일.ipynb
[ 34/182] posts/Special Topics in Big Data Analysis/2022-06-09-(14주차) 6월9일.ipynb
[ 35/182] posts/Special Topics in Big Data Analysis/2022_04_18_(7주차)_4월18일.ipynb
[ 36/182] posts/Applied statistics/AS3_3.ipynb
[ 37/182] posts/Applied statistics/AS4.ipynb
[ 38/182] posts/Applied statistics/AS1_3.ipynb
[ 39/182] posts/Applied statistics/AS4_5.ipynb
[ 40/182] posts/Applied statistics/02. CH0304.ipynb
[ 41/182] posts/Applied statistics/01. Simple Linear Regression.ipynb
[ 42/182] posts/Applied statistics/AS1.ipynb
[ 43/182] posts/Applied statistics/08. 다항회귀실습.ipynb
[ 44/182] posts/Applied statistics/10. GLS실습.ipynb
[ 45/182] posts/Applied statistics/alpha.ipynb
[ 46/182] posts/Applied statistics/06. 회귀진단 실습.ipynb
[ 47/182] posts/Applied statistics/11. 편의추정 실습.ipynb
[ 48/182] posts/Applied statistics/04. 선형회귀분석 CH0607.ipynb
[ 49/182] posts/Applied statistics/12. 로지스틱 회귀분석.ipynb
[ 50/182] posts/Applied statistics/03. CH0304_simulation.ipynb
[ 51/182] posts/Applied statistics/AS3.ipynb
[ 52/182] posts/Applied statistics/05. 가변수 실습.ipynb
[ 53/182] posts/Applied statistics/AS4_5-Copy1.ipynb
[ 54/182] posts/Applied statistics/09. 변수변환.ipynb
[ 55/182] posts/Applied statistics/07. 변수선택 실습.ipynb
[ 56/182] posts/Applied statistics/AS2.ipynb
[ 57/182] posts/Synthetic data/synthetic data.ipynb
[ 58/182] posts/Synthetic data/Practical Synthetic Data Generation.ipynb
[ 59/182] posts/Synthetic data/A Comparison of Synthetic Data Approaches Using Utility and Disclosure Risk Measures.ipynb
[ 60/182] posts/Synthetic data/2023-07-01-CTGAN.ipynb
[ 61/182] posts/Synthetic data/[R] synthpop.ipynb
[ 62/182] posts/Synthetic data/Modeling Tabular Data using Conditional GAN.ipynb
[ 63/182] posts/Synthetic data/Advaced Deep Learning with TensorFlow 2 and Keras.ipynb
[ 64/182] posts/Synthetic data/2023-07-02-CTGAN-TOY.ipynb
[ 65/182] posts/ref/Ref.ipynb
[ 66/182] posts/study/주성분 분석.ipynb
[ 67/182] posts/study/콰트로 블로그 만드는 법.ipynb
[ 68/182] posts/study/Python Data Analysis/경사하강법.ipynb
[ 69/182] posts/study/Python Data Analysis/딥러닝 회귀분석.ipynb
[ 70/182] posts/study/Python Data Analysis/다층 퍼셉트론과 딥러닝.ipynb
[ 71/182] posts/study/Python Data Analysis/인공신경망과 퍼셉트론.ipynb
[ 72/182] posts/study/KSS-DataFrame.ipynb
[ 73/182] posts/study/선형대수와 통계학으로 배우는 머신러닝 with 파이썬/ml with python 7.ipynb
[ 74/182] posts/study/선형대수와 통계학으로 배우는 머신러닝 with 파이썬/ml with python 9.ipynb
[ 75/182] posts/study/tutorial_hand_on.ipynb
[ 76/182] posts/study/imbalaced data/plot_comparison_under_sampling.ipynb
[ 77/182] posts/study/imbalaced data/imbalaced data.ipynb
[ 78/182] posts/study/imbalaced data/plot_comparison_over_sampling.ipynb
[ 79/182] posts/study/boostcourse/2. 데이터 분석 준비하기.ipynb
[ 80/182] posts/study/boostcourse/0. jupyter basic.ipynb
[ 81/182] posts/study/boostcourse/3. 서울 종합병원 분포 확인하기.ipynb
[ 82/182] posts/study/boostcourse/1. file-path-setting.ipynb
[ 83/182] posts/study/boostcourse/5. K-beauty.ipynb
[ 84/182] posts/study/boostcourse/4. 건강검진 데이터로 가설검정.ipynb
[ 85/182] posts/study/데이터 개념 공부.ipynb
[ 86/182] posts/study/Deep learning with pytorch/Untitled.ipynb
[ 87/182] posts/study/Deep learning with pytorch/tensor basic.ipynb
[ 88/182] posts/study/baseball-salary-prediction.ipynb
[ 89/182] posts/study/sklearn.ipynb
[ 90/182] posts/Graph Machine Learning/graph3-2.ipynb
[ 91/182] posts/Graph Machine Learning/graph4-1.ipynb
[ 92/182] posts/Graph Machine Learning/9999.ipynb
[ 93/182] posts/Graph Machine Learning/graph basic.ipynb
[ 94/182] posts/Graph Machine Learning/graph3-3.ipynb
[ 95/182] posts/Graph Machine Learning/graph8(logistic-amt+time+citypop+lat+merchlat).ipynb
[ 96/182] posts/Graph Machine Learning/graph8(logistic, amt+time).ipynb
[ 97/182] posts/Graph Machine Learning/graph8(logistic-amt+time+lat+merchlat).ipynb
[ 98/182] posts/Graph Machine Learning/graph3-1.ipynb
[ 99/182] posts/Graph Machine Learning/graph4-2.ipynb
[100/182] posts/Graph Machine Learning/graph5-1.ipynb
[101/182] posts/Graph Machine Learning/graph8(frac=0.4).ipynb
[102/182] posts/Graph Machine Learning/graph5-2.ipynb
[103/182] posts/Graph Machine Learning/graph8.ipynb
[104/182] posts/Graph Machine Learning/graph8(frac=0.3).ipynb
[105/182] posts/Graph Machine Learning/graph2.ipynb
[106/182] posts/Graph Machine Learning/graph8(logistic+graph).ipynb
[107/182] posts/Graph Machine Learning/graph8(logistic-amt+time+citypop).ipynb
[108/182] posts/Graph Machine Learning/graph8.사기거래=0필터.ipynb
[109/182] posts/Graph Machine Learning/graph8.df원본에서진행.ipynb
[110/182] posts/Machine Learning/MachineLearning_midterm(202250926).ipynb
[111/182] posts/Machine Learning/2022_11_29_13wk_2_final_checkpoint_ipynb의_사본.ipynb
[112/182] posts/Machine Learning/2. CNN/2022_10_26_(8주차)_10월26일(2)_ipynb의_사본.ipynb
[113/182] posts/Machine Learning/2. CNN/기계학습특강2022_10_19_ipynb의_사본.ipynb
[114/182] posts/Machine Learning/2022_12_21_Extra_1_ipynb의_사본.ipynb
[115/182] posts/Machine Learning/2022_09_07_(1주차)_9월7일_ipynb의_사본.ipynb
[116/182] posts/Machine Learning/3. RNN/2022_11_09_(10주차)_11월9일_ipynb의_사본.ipynb
[117/182] posts/Machine Learning/3. RNN/2022_10_31_(9주차)_10월31일_ipynb의_사본.ipynb
[118/182] posts/Machine Learning/3. RNN/2022_11_16_11wk_ipynb의_사본.ipynb
[119/182] posts/Machine Learning/3. RNN/2022_11_30_12wk_checkpoint_ipynb의_사본.ipynb
[120/182] posts/Machine Learning/3. RNN/2022_12_08_13wk_checkpoint.ipynb
[121/182] posts/Machine Learning/2022_09_14_(2주차)_9월14일_ipynb의_사본.ipynb
[122/182] posts/Machine Learning/(202250926)기계학습특강_final (2).ipynb
[123/182] posts/Machine Learning/1. DNN/2022_10_12_6wk_checkpoint.ipynb
[124/182] posts/Machine Learning/1. DNN/2022_09_28_(4주차)_9월 28일__ipynb의_사본.ipynb
[125/182] posts/Machine Learning/1. DNN/2022_09_21_(3주차)_9월21일_ipynb의_사본.ipynb
[126/182] posts/Machine Learning/1. DNN/2022_09_28_(5주차)_10월05일_ipynb의_사본.ipynb
[127/182] posts/Advanved Probability Theory/2. 확률론 기초/2023-05-09-10wk-checkpoint.ipynb
[128/182] posts/Advanved Probability Theory/2. 확률론 기초/2023-06-06-14wk.ipynb
[129/182] posts/Advanved Probability Theory/2. 확률론 기초/2023-05-16-11wk-checkpoint.ipynb
[WARNING] Citeproc: citation durrett2019probability not found
[130/182] posts/Advanved Probability Theory/2. 확률론 기초/2023-05-23-12wk.ipynb
[WARNING] Citeproc: citation cybenko1989approximation not found
[WARNING] Citeproc: citation durrett2019probability not found
[WARNING] Citeproc: citation makarov2013real not found
[131/182] posts/Advanved Probability Theory/2. 확률론 기초/2023-05-30-13wk.ipynb
[WARNING] Citeproc: citation durrett2019probability not found
[132/182] posts/Advanved Probability Theory/2. 확률론 기초/2023-05-02-9wk-checkpoint.ipynb
[133/182] posts/Advanved Probability Theory/fin.ipynb
[134/182] posts/Advanved Probability Theory/1. 측도론/2023-04-25-8wk.ipynb
[135/182] posts/Advanved Probability Theory/1. 측도론/2023-04-11-6wk-checkpoint.ipynb
[136/182] posts/Advanved Probability Theory/1. 측도론/2023_03_14_2wk_checkpoint.ipynb
[137/182] posts/Advanved Probability Theory/1. 측도론/2023_03_07_1wk_checkpoint.ipynb
[138/182] posts/Advanved Probability Theory/1. 측도론/2023_04_05_5wk_checkpoint.ipynb
[139/182] posts/Advanved Probability Theory/1. 측도론/2023_03_28_4wk_checkpoint.ipynb
[140/182] posts/Advanved Probability Theory/1. 측도론/2023-04-18-7wk-checkpoint.ipynb
[141/182] posts/Advanved Probability Theory/1. 측도론/2023_03_21_3wk_checkpoint.ipynb
[142/182] posts/Review/Synthetic data/synthetic data.ipynb
[143/182] posts/Review/Synthetic data/Practical Synthetic Data Generation.ipynb
[144/182] posts/Review/Synthetic data/A Comparison of Synthetic Data Approaches Using Utility and Disclosure Risk Measures.ipynb
[145/182] posts/Review/Synthetic data/[R] synthpop.ipynb
[146/182] posts/Review/Synthetic data/Modeling Tabular Data using Conditional GAN.ipynb
[147/182] posts/Review/Synthetic data/Advaced Deep Learning with TensorFlow 2 and Keras.ipynb
[148/182] posts/GNN/PyG/ls2.ipynb
[149/182] posts/GNN/PyG/ls6.out.ipynb
[150/182] posts/GNN/PyG/ls5.ipynb
[151/182] posts/GNN/PyG/2023-07-02-lesson1.ipynb
[152/182] posts/GNN/PyG/ls3.ipynb
[153/182] posts/GNN/PyG/ls4.ipynb
[154/182] posts/GNN/An Introduction to Graph Neural Network(GNN) For Analysing Structured Data.ipynb
[155/182] posts/GNN/GNN논문.ipynb
[156/182] posts/GNN/FRAUD/230818 데이터(4, df50_com으로 93퍼 accuracy).ipynb
[157/182] posts/GNN/FRAUD/230827 데이터(9, df50 mask만들었는데 결과값이 달라).out.ipynb
[158/182] posts/GNN/FRAUD/230822 데이터(5, matrix로 ls6시도해보기..실패).ipynb
[159/182] posts/GNN/FRAUD/230825 데이터(8, df02)커널죽음.out.ipynb
[160/182] posts/GNN/FRAUD/230814 fraud(2, tr,test_mask).ipynb
[161/182] posts/GNN/FRAUD/230823 데이터(7, df50_com으로 93퍼 accuracy)_guebin.ipynb
[162/182] posts/GNN/FRAUD/230816 fraud(3, df50_com, tr,test합치기).ipynb
[163/182] posts/GNN/FRAUD/230822 데이터(6, df02).ipynb
[164/182] posts/GNN/FRAUD/230810 fraud(1, tr로만 96퍼 accruacy).ipynb
[165/182] posts/GNN/FRAUD/230822 데이터(6, df02)-Copy1.ipynb
[166/182] posts/GNN/Neural Network.ipynb
[167/182] posts/GNN/230810 데이터정리.ipynb
[168/182] posts/GNN/0810.ipynb
[169/182] posts/GNN/Graph basic.ipynb
[170/182] posts/GNN/Laplacian.ipynb
[171/182] posts/Theoretical statistics/TS5.ipynb
[172/182] posts/Theoretical statistics/TS7.ipynb
[173/182] posts/Theoretical statistics/TS4.ipynb
[174/182] posts/Theoretical statistics/TS3.ipynb
[175/182] posts/Theoretical statistics/TS1.ipynb
[176/182] posts/Theoretical statistics/TS8.ipynb
[177/182] posts/Theoretical statistics/TS9.ipynb
[178/182] posts/Theoretical statistics/TS final.ipynb
[179/182] posts/Theoretical statistics/TS2.ipynb
[180/182] posts/Theoretical statistics/TS6.ipynb
[181/182] about.qmd
[182/182] index.qmd

Preparing worktree (resetting branch 'gh-pages'; was at ee6ded06)
Branch 'gh-pages' set up to track remote branch 'gh-pages' from 'origin'.
HEAD is now at ee6ded06 Built site for gh-pages
error: the following files have local modifications:
    posts/Advanved Probability Theory/2023_04_05_5wk_checkpoint.html
    posts/Applied statistics/03. CH0304_simulation_files/figure-html/cell-53-output-1.png
    posts/Applied statistics/03. CH0304_simulation_files/figure-html/cell-6-output-1.png
    posts/Applied statistics/05. 가변수 실습.html
    posts/Applied statistics/06. 회귀진단 실습.html
    posts/Applied statistics/06. 회귀진단 실습.out.ipynb
    posts/Applied statistics/06. 회귀진단 실습_files/figure-html/cell-33-output-1.png
    posts/Applied statistics/06. 회귀진단 실습_files/figure-html/cell-46-output-1.png
    posts/Applied statistics/06. 회귀진단 실습_files/figure-html/cell-47-output-1.png
    posts/Applied statistics/06. 회귀진단 실습_files/figure-html/cell-5-output-1.png
    posts/Applied statistics/06. 회귀진단 실습_files/figure-html/cell-7-output-1.png
    posts/Applied statistics/07. 변수선택 실습.html
    posts/Applied statistics/07. 변수선택 실습.out.ipynb
(use --cached to keep the file, or -f to force removal)
[gh-pages db1a3b4d] Built site for gh-pages
 186 files changed, 5197 insertions(+), 6147 deletions(-)
origin  https://github.com/boram-coco/coco.git (fetch)
origin  https://github.com/boram-coco/coco.git (push)
To https://github.com/boram-coco/coco.git
   ee6ded06..db1a3b4d  HEAD -> gh-pages

NOTE: GitHub Pages sites use caching so you might need to click the refresh
button within your web browser to see changes after deployment.

[✓] Published to https://boram-coco.github.io/coco/

NOTE: GitHub Pages deployments normally take a few minutes (your site updates
will be visible once the deploy completes)